1 import java.util.regex.*;
2 import java.util.*;
3 import java.io.*;
4
5 public class CharacterScript {
6
7
8 static void fortest(String fmt, Object... o) {
9
10 }
11
12 static void print(String fmt, Object... o) {
13 System.out.printf(fmt, o);
14 }
15
16 static void debug(String fmt, Object... o) {
17
18 }
19
20 public static void main(String args[]){
21 try {
22 if (args.length != 1) {
23 System.out.println("java CharacterScript script.txt out");
24 System.exit(1);
25 }
26
27 int i, j;
28 BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
29 HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();
30 String line = null;
31
32 Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
33
34 int prevS = -1;
35 int prevE = -1;
36 String prevN = null;
37 int[][] scripts = new int[1024][3];
38 int scriptSize = 0;
39
40 while ((line = sbfr.readLine()) != null) {
41 if (line.length() <= 1 || line.charAt(0) == '#') {
42 continue;
43 }
44 m.reset(line);
45 if (m.matches()) {
46 int start = Integer.parseInt(m.group(1), 16);
47 int end = (m.group(2)==null)?start
48 :Integer.parseInt(m.group(2), 16);
49 String name = m.group(3);
50 if (name.equals(prevN) && start == prevE + 1) {
51 prevE = end;
52 } else {
53 if (prevS != -1) {
54 if (scriptMap.get(prevN) == null) {
55 scriptMap.put(prevN, scriptMap.size());
56 }
57 scripts[scriptSize][0] = prevS;
58 scripts[scriptSize][1] = prevE;
59 scripts[scriptSize][2] = scriptMap.get(prevN);
60 scriptSize++;
61 }
62 debug("%x-%x\t%s%n", prevS, prevE, prevN);
63 prevS = start; prevE = end; prevN = name;
64 }
65 } else {
66 debug("Warning: Unrecognized line <%s>%n", line);
67 }
68 }
69
70
71 if (scriptMap.get(prevN) == null) {
72 scriptMap.put(prevN, scriptMap.size());
73 }
74 scripts[scriptSize][0] = prevS;
75 scripts[scriptSize][1] = prevE;
76 scripts[scriptSize][2] = scriptMap.get(prevN);
77 scriptSize++;
78
79 debug("%x-%x\t%s%n", prevS, prevE, prevN);
80 debug("-----------------%n");
81 debug("Total scripts=%s%n", scriptMap.size());
82 debug("-----------------%n%n");
83
84 String[] names = new String[scriptMap.size()];
85 for (String name: scriptMap.keySet()) {
86 names[scriptMap.get(name).intValue()] = name;
87 }
88
89 for (j = 0; j < scriptSize; j++) {
90 for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
91 String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
92 if (cp > 0xffff)
93 System.out.printf("%05X %s%n", cp, name);
94 else
95 System.out.printf("%05X %s%n", cp, name);
96 }
97 }
98
99 Arrays.sort(scripts, 0, scriptSize,
100 new Comparator<int[]>() {
101 public int compare(int[] a1, int[] a2) {
102 return a1[0] - a2[0];
103 }
104 public boolean compare(Object obj) {
105 return obj == this;
106 }
107 });
108
109
110
111
112
113
114
115
116
117
118
119 ArrayList<int[]> list = new ArrayList();
120 list.add(scripts[0]);
121
122 int[] last = scripts[0];
123 for (i = 1; i < scriptSize; i++) {
124 if (scripts[i][0] != (last[1] + 1)) {
125
126 boolean isNotUnassigned = false;
127 for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
128 if (Character.getType(cp) != Character.UNASSIGNED) {
129 isNotUnassigned = true;
130 debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
131 break;
132 }
133 }
134 if (isNotUnassigned) {
135
136 int[] a = new int[3];
137 a[0] = last[1] + 1;
138 a[1] = scripts[i][0] - 1;
139 a[2] = -1;
140 list.add(a);
141 } else {
142 if (last[2] == scripts[i][2]) {
143
144 last[1] = scripts[i][1];
145 continue;
146 } else {
147
148 last[1] = scripts[i][0] - 1;
149 }
150 }
151 }
152 list.add(scripts[i]);
153 last = scripts[i];
154 }
155
156 for (i = 0; i < list.size(); i++) {
157 int[] a = (int[])list.get(i);
158 String name = "UNKNOWN";
159 if (a[2] != -1)
160 name = names[a[2]].toUpperCase(Locale.US);
161 debug("0x%05x, 0x%05x %s%n", a[0], a[1], name);
162 }
163 debug("--->total=%d%n", list.size());
164
165
166
167 print("public class Scripts {%n%n");
168 print(" public static enum UnicodeScript {%n");
169 for (i = 0; i < names.length; i++) {
170 print(" /**%n * Unicode script \"%s\".%n */%n", names[i]);
171 print(" %s,%n%n", names[i].toUpperCase(Locale.US));
172 }
173 print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n");
174
175
176
177 print(" private static final int[] scriptStarts = {%n");
178 for (int[] a : list) {
179 String name = "UNKNOWN";
180 if (a[2] != -1)
181 name = names[a[2]].toUpperCase(Locale.US);
182 if (a[0] < 0x10000)
183 print(" 0x%04X, // %04X..%04X; %s%n",
184 a[0], a[0], a[1], name);
185 else
186 print(" 0x%05X, // %05X..%05X; %s%n",
187 a[0], a[0], a[1], name);
188 }
189 last = list.get(list.size() -1);
190 if (last[1] != Character.MAX_CODE_POINT)
191 print(" 0x%05X // %05X..%06X; %s%n",
192 last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
193 "UNKNOWN");
194 print("%n };%n%n");
195
196 print(" private static final UnicodeScript[] scripts = {%n");
197 for (int[] a : list) {
198 String name = "UNKNOWN";
199 if (a[2] != -1)
200 name = names[a[2]].toUpperCase(Locale.US);
201 print(" %s,%n", name);
202 }
203
204 if (last[1] != Character.MAX_CODE_POINT)
205 print(" UNKNOWN%n");
206 print(" };%n");
207 print(" }%n");
208 print("}%n");
209
210 } catch (Exception e) {
211 e.printStackTrace();
212 }
213 }
214 }